library(mvtnorm)
library(fdadensity)
library(transport)
library(pracma)
library(dr)
## Loading required package: MASS
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:MASS':
##
## select
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(frechet)
require(tictoc)
## Loading required package: tictoc
##
## Attaching package: 'tictoc'
## The following objects are masked from 'package:pracma':
##
## clear, size, tic, toc
require(MASS)
require(magrittr)
## Loading required package: magrittr
##
## Attaching package: 'magrittr'
## The following objects are masked from 'package:pracma':
##
## and, mod, or
require(dplyr)
library(parallel)
library(ggplot2)
library(tidyr)
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:magrittr':
##
## extract
library(stringr)
library(statip)
##
## Attaching package: 'statip'
## The following object is masked from 'package:pracma':
##
## erf
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:MASS':
##
## select
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(latex2exp)
##
## Attaching package: 'latex2exp'
## The following object is masked from 'package:plotly':
##
## TeX
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
library(tictoc)
library(nlme)
##
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
##
## collapse
library(fda)
## Loading required package: splines
## Loading required package: fds
## Loading required package: rainbow
## Loading required package: pcaPP
## Loading required package: RCurl
##
## Attaching package: 'RCurl'
## The following object is masked from 'package:tidyr':
##
## complete
## The following objects are masked from 'package:tictoc':
##
## pop, push
## Loading required package: deSolve
##
## Attaching package: 'deSolve'
## The following object is masked from 'package:pracma':
##
## rk4
##
## Attaching package: 'fda'
## The following object is masked from 'package:graphics':
##
## matplot
library(rstudioapi)
library(rrpack)
library(EnvStats)
##
## Attaching package: 'EnvStats'
## The following objects are masked from 'package:statip':
##
## cv, predict
## The following object is masked from 'package:MASS':
##
## boxcox
## The following objects are masked from 'package:stats':
##
## predict, predict.lm
library(devtools)
## Warning: package 'devtools' was built under R version 4.3.2
## Loading required package: usethis
## Warning: package 'usethis' was built under R version 4.3.2
library(roxygen2)
## Warning: package 'roxygen2' was built under R version 4.3.2
rm(list=ls())
setwd(dirname(getActiveDocumentContext()$path))
source("functions.R")
ncores <- detectCores()
##– Example 1
## set parameters
set.seed(1)
n <- 100
p <- 10
rho <- 0.5
sig <- ar1cor(p,rho)
b1 <- c(1,1,rep(0,p-2))
beta <- cbind(b1)
d <- 1
## generate X
X <- rmvnorm(n, rep(0,p), sig)
## generate errors
set.seed(1)
eps <- rnorm(n,0,sd=0.5)
eps[c(10,15)] <- rnorm(2,5,sd=2)
## generate Y
Y <- 0.5 + X%*%b1 + eps
## calculate pairwise distances
Dy <- as.matrix(dist(Y,method="euclidean",diag=TRUE,upper=TRUE))
## Calculate surrogate response and metric Cook's distances
MCD <- mcd(X, Dy)
## OLS estimate based on the surrogate response
bhat <- coef(lm(MCD$Sy~X))[-1]
## Plot Cook's distances
cols <- rep(1,n)
cols[c(10,15)] <- 2
par(mfrow=c(2,2))
plot(Y~X%*%b1, xlab=expression(X~beta),col=cols, main="(A)")
plot(MCD$Sy~X%*%bhat, xlab=expression(X~hat(beta)), ylab=expression(S^Y),
main="(B)", col=cols, pch=18)
plot(lm(Y ~ X), 4, main="(C)")
plot(mcd~i, data=MCD$MCD.index, type="h", ylab="Cook's distance",
xlab="Obs. number", main="(D) Metric Cook's distance", ylim=c(0,1))
text(x=MCD$MCD.index$i[c(10,15)],y=MCD$MCD.index$mcd[c(10,15)],
labels=c(10,15),pos=3)
set.seed(1)
n <- 100
p <- 10
rho <- 0.5
sig <- ar1cor(p,rho)
X <- rmvnorm(n, rep(0,p), sig)
b1 <- c(1,-1,rep(0,p-2))
q=2
r=1
B = cbind(b1)%*%rbind(rortho(q)[1:r,])
sig_e <- matrix(c(1,0.7,0.7,1),nrow=2,byrow=T)
eps <- rmvnorm(n, rep(0,q), sig_e)
eps[10,1] <- rnorm(1,5,2)
eps[15,2] <- rnorm(1,-5,2)
Y <- sin(X%*%B) + eps
## 3D scatter plot of responses vs sufficient predictor
Ydf <- data.frame(y=Y,x=X%*%b1)
colnames(Ydf) <- c("y1","y2","x")
Ydf$col <- "n"
Ydf$col[c(10,15)] <- "y"
fig <- plot_ly(Ydf, x=~x, y=~y1, z=~y2, color=~col, colors=c('black','#BF382A'),
type='scatter3d', mode="markers") %>%
layout(scene = list(
xaxis=list(title="\u03B2<sup>T</sup>x",zerolinewidth=0),
yaxis=list(title="y<sub>1</sub>",zerolinewidth=0),
zaxis=list(title="y<sub>2</sub>",zerolinewidth=0)))
fig <- hide_colorbar(fig) %>% layout( showlegend = TRUE,
legend=list(title=list(text='<b> Outlier </b>')))
fig
## Calculate pairwise distances
Dy <- as.matrix(dist(Y, method="euclidean", diag=TRUE, upper=TRUE))
## Calculate surrogate response and metric Cook's distances
MCD <- mcd(X,Dy)
## Plot Cook's distances
par(mfrow=c(2,2))
plot(lm(Y[,1] ~ X), 4, main="(A)")
plot(lm(Y[,2] ~ X), 4, main="(B)")
plot(mcd~i, data=MCD$MCD.index, type="h", ylab="Cook's distance",
xlab="Obs. number", main="(c) Metric Cook's distance", ylim=c(0, 0.3))
text(x=MCD$MCD.index$i[c(10,15)],y=MCD$MCD.index$mcd[c(10,15)],
labels=c(10,15),pos=3)
##—- Example 3
n <- 100
p <- 10
m <- 50
b1 <- c(1,-1,rep(0,p-2))/sqrt(2)
d <- 1
set.seed(123)
X <- matrix(NA,n,p)
for(h in 1:n) X[h,] <- runif(p,0,1)
Y <- list()
for(v in 1:n ) Y[[v]] <- runif(m,-5*abs(X[v,]%*%b1),5*abs(X[v,]%*%b1))
Y[[10]] <- rexp(m,abs(X[10,]%*%b1))
Y[[15]] <- rnormMix(m,mean1=0,sd1=abs(X[15,]%*%b1),
mean2=(X[15,]%*%b1),sd2=5,p.mix = 0.5)
### plot densities for each response
dens0 <- lapply(Y, density)
dens <- data.frame(
x = unlist(lapply(dens0, "[[", "x")),
y = unlist(lapply(dens0, "[[", "y"))
)
xb <- round(c(X%*%b1),4)
cols <- rep("n",100)
cols[c(10,15)] <- "y"
Pred <- xb[rep(seq_len(length(xb)), each=length(dens0[[1]]$x))]
Cols <- cols[rep(seq_len(length(cols)), each=length(dens0[[1]]$x))]
dens_df <- cbind(dens,Pred,Cols)
axx <- list(title = "y")
axy <- list(title = "density")
axz <- list(title = list(text='\u03B2<sup>T</sup>x'))
fig <- plot_ly(dens_df, x=~x, y=~y, z=~Pred, split=~Pred, color=~Cols,
colors=c('black','#BF382A'),type='scatter3d', mode='lines') %>%
layout(scene=list(xaxis=axx,yaxis=axy,zaxis=axz))
fig <- hide_colorbar(fig) %>% layout(showlegend = FALSE)
fig
## Compute pairwise distances
Dy <- wdist(Y)
## Calculate surrogate response and metric Cook's distances
MCD <- mcd(X,Dy)
## Plot Cook's distances
par(mfrow=c(1,1))
plot(mcd~i, data=MCD$MCD.index, type="h", ylab="Cook's distance",
xlab="Obs. number", main="(D) Metric Cook's distance", ylim=c(0,1))
text(x=MCD$MCD.index$i[c(10,15)],y=MCD$MCD.index$mcd[c(10,15)],
labels=c(10,15),pos=3)
##—- Example 4
set.seed(123)
n <- 100
p <- 10
rho <- 0
sig <- ar1cor(p,rho)
X <- rmvnorm(n, rep(0,p), sig)
b1 <- c(1,-1,rep(0,p-2))/sqrt(2)
m <- 30
TT <- sort(round(runif(m,0,10), 4)) # Functional data support
mu <- 10*cos(pi + pi*TT/5)
mu_t <- matrix(mu,n,m, byrow=TRUE)
Y <- list()
for (i in seq_len(n)) {
Y[[i]] <- mu_t[i,] + c(5*sin(X[i,]%*%b1)) + rnorm(1)
}
Y[[10]][seq(5,25,5)] <- Y[[10]][seq(5,25,5)] + c(5*exp(2+X[seq(5,25,5),]%*%b1))
Y[[15]] <- mu_t[15,] + c(5*sin(X[15,]%*%b1)) - 2*rnorm(m,0,10)
## Compute pairwise distances
Dy <- tsdist(Y)
## Calculate surrogate response and metric Cook's distances
MCD <- mcd(X,Dy)
## Plot Cook's distances
par(mfrow=c(1,2))
matplot(do.call(cbind,Y),type='l',xlab="times (t)",
main="Functional responses", ylab=expression(y(t)))
plot(mcd~i, data=MCD$MCD.index, type="h", ylab="Cook's distance",
xlab="Obs. number", main="(D) Metric Cook's distance", ylim=c(0,1))
text(x=MCD$MCD.index$i[c(10,15)],y=MCD$MCD.index$mcd[c(10,15)],
labels=c(10,15),pos=3)